package com.dscyy.readutil;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashSet;
import java.util.Set;
/**
 * 读取 传众 http://www.czvv.com/s0k6aKN5rOJ5Yy65Li05rKC5ZWG5Z+Op0.html
 * @author dscyy
 *
 */
public class DownSWLMNamesInfo {
	public static void main(String[] args) throws Exception {// 本程序内部异常过多为了简便，不一Try，直接抛给虚拟机
		Long StartTime = System.currentTimeMillis();

		String webaddress = "http://www.biz72.com/search/corp/k-%E9%A2%8D%E6%B3%89%E5%8C%BA%E4%B8%B4%E6%B2%82%E5%95%86%E5%9F%8E_p-"; // 创建输入对象
		File file = new File("D:" + File.separator + "chuanzhong.txt");// 实例化文件类对象
		Set<String> allEmails = new HashSet<String>();;
		for(int i=1; i < 10; i++) {
			String web = webaddress + i + ".html";
			System.out.println("wget "+ "-O "+i+".html " + web);
//			allEmails.addAll(scrspyEmails(web));
		}
//		scrspyEmails("http://yqqlyscc.cn.biz72.com/");
		
		Writer outWriter = new FileWriter(file);// 实例化outWriter类
		
		for(String s : allEmails) {
			outWriter.write(s + "\r\n");// 将匹配的字符输入到目标文件
		}
		
		Long StopTime = System.currentTimeMillis();
		String UseTime = (StopTime - StartTime) + "";
		outWriter.write("--------------------------------------------------------\r\n");
		outWriter.write("本次爬取页面地址：" + webaddress + "\r\n");
		outWriter.write("爬取用时：" + UseTime + "毫秒\r\n");
		outWriter.write("本次共得到邮箱：" + allEmails.size() + "条\r\n");
		outWriter.write("****谢谢您的使用****\r\n");
		outWriter.write("--------------------------------------------------------");
		outWriter.close();// 关闭文件输出操作
		System.out.println(" —————————————————————\t");
		System.out.println("|页面爬取成功，请到D盘根目录下查看test文档|\t");
		System.out.println("|                                         |");
		System.out.println("|如需重新爬取，请再次执行程序,谢谢您的使用|\t");
		System.out.println(" —————————————————————\t");
	}

	private static Set<String> scrspyEmails(String webaddress) {
		Set<String> data = new HashSet<String>();
		// 并指明输出地址和输出文件名
		URL url;
		try {
			url = new URL(webaddress);
			URLConnection conn = url.openConnection();// 取得链接

			BufferedReader buff = new BufferedReader(new InputStreamReader(
					conn.getInputStream()));// 取得网页数据
			String line = null;
			while ((line = buff.readLine()) != null) {
				System.out.println(line);
			}
			buff.close();
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}// 实例化URL类。
		return data;
	}
	
}